import os
import json
from utils import Config
news_db = open(Config.raw_news_path, 'rb')
news_db = news_db.readlines()

titles = []

db = open(Config.raw_news_path, 'rb').readlines()

new_db = {}
cnt = 0
for line in db:
	try:
		#print line
		data = json.loads(line)
		if len(data['content']) < 50 or data['title'] == '':
			continue
		hash_md5 = hashlib.md5(data['title'].encode('utf-8'))
		new_db[hash_md5.hexdigest()] = encode_dict(data)
		cnt += 1
		#print cnt
	except:
		pass


safe_pickle_dump(new_db, Config.db_path)







